Biomarker discovery via multipratt gives some indications for taxa specific for fish, bacterioplankton and seasonal sampling. Overall it seems adviced to run several biomarker discovery techniques in parallel but in this variable dataset they give little insides and are more meant as a backup for the network analysis.

1 Global Setup

1.1 Path

before you start: .rs.restartR()

1.2 Packages

1.3 Functions

1.4 Input Files

1.5 Tutorials

1.6 Setup Analysis

#-

7 Bacterial Biomarkers

7.0 Abundance Heatmap

################################
#BacteriaHeatPlot Visualization# Fish
################################
    require(phyloseq)
    require(ggplot2)
    require(tidyverse)
    require(cowplot)
  rowclusternum        <- 6
  columnclusternum     <- 7
  FILENAME    <- paste(paste("Cluster_Heatmap_Species_ALL", Type, sep="_"), ".png", sep="")
  TITLE       <- draw_label_themeRKwhite(paste(""), element = "plot.title", x =  0.05, hjust = 0, vjust = 1)
 
  #CLR         <-  as.data.frame(SummarizedExperiment::assay(pslist$TSEc.l.r_SSU))
  CoreSet <- "Fish"
  
  ps <- merge_phyloseq(pslist$ps_OE, pslist$ps_GC, pslist$ps_WF)
  ps_clr <- microbiome::transform(ps, "clr")
  CLR <- as.data.frame(SummarizedExperiment::assay(mia::makeTreeSummarizedExperimentFromPhyloseq(ps_clr)))
  
  SampleOrder <- SSU_Samples

  SAMDF       <- SAMDF16S[SAMDF16S$SampleID %in% rownames(sample_data(ps)),]
  WIDTH = length(SAMDF$SampleID) *0.04
  HEIGHT = 10
   
  IndicatorSpecies_HeatPlot <- BacteriaHeatPlotRK7_withCore(OmicsData = CLR, Species = Species, genes_of_interest = rownames(CLR), Samples = colnames(CLR), SAMDF = SAMDF,  TITLE = TITLE, filename= FILENAME, OutlineColor=OutlineColor,  SHOW_ROW_NAMES = T, SHOW_ROW_NAMES_ALL = F,  ZScore =  F) 
## [1] "provide relative abundance data"

7.1 Indicator species/multipratt

https://cran.r-project.org/web/packages/indicspecies/vignettes/IndicatorSpeciesAnalysis.html

#install.packages("indicspecies")
IndicatorSpecies_list <- list()
for (Dataset in names(pslist)[!grepl("WF", names(pslist))][grepl("ps_SSU|ps_GC|ps_OE|ps_Fish", names(pslist)[!grepl("WF", names(pslist))])]) {
  
    require(indicspecies)
    require(phyloseq)
    require(ggplot2)
    require(tidyverse)
    require(cowplot)
    Datasetname <- sub("ps_", "", Dataset)
    IndicatorSpecies_list_length <- length(IndicatorSpecies_list)
  
    if (Datasetname %in% c("Fish")) {
      ps <- merge_phyloseq(pslist$ps_OE, pslist$ps_GC)
      Comparison  <- "Species"
      Interested_Comparisons <- c(
        "GC", 
        "OE"
      )
    } else if (Datasetname %in% c("SSU")) {
      ps <- merge_phyloseq(pslist$ps_OE, pslist$ps_GC, pslist$ps_WF)
      #ps <- filter_taxa(ps, function (x) {sum(x) > 50}, prune=TRUE)
      Comparison  <- "Kind"
      Interested_Comparisons <- c(
        "Fish",
        "Water"
        )
    } else if (Datasetname %in% c("OE", "GC")) {
      ps <- pslist[[Dataset]]
      Comparison  <- "Season"
      Interested_Comparisons <- c(
      "Spring_22",
      "Summer_21",
      "Winter_22",
      "Autumn_21"
      )
    
    }
    
    result <- indicspecies::multipatt(
      otu_table(ps), 
      sample_data(ps)[[paste(Comparison)]], 
      #func = "IndVal.g", 
      control = how(nperm = 999))
  
    df_list <- split(result$sign, result$sign$index)
    Groups <- as.vector(dimnames(result$comb))[[2]]
    
    for (i in seq_along(names(df_list))) {
      names(df_list)[[i]] <- Groups[i]}

    filtered_df_list <- list()

    for (group_name in names(df_list)[-length(df_list)]) {
      filtered_df <- df_list[[group_name]]
      filtered_df <- filtered_df[filtered_df$p.value < 0.05,]
      filtered_df_list[[group_name]] <- filtered_df}
    
    filtered_df_list[[names(df_list)[length(df_list)]]] <- df_list[[length(df_list)]]

    ordered_df_list <- lapply(filtered_df_list, function(df) df[order(df$stat, decreasing = T), ])
    #cat(paste(shQuote(names(ordered_df_list), type="cmd"), collapse=", ")) 
    
    ordered_df_list_intest <- ordered_df_list[names(ordered_df_list) %in% Interested_Comparisons]
    ordered_df_list_intest_0.7 <-list()
    
    for (group_name in names(ordered_df_list_intest)) {
      filtered_df <- df_list[[group_name]]
      filtered_df <- filtered_df[filtered_df$stat > 0.7, ]
      ordered_df_list_intest_0.7[[group_name]] <- filtered_df
    }
    
    print(Datasetname)
    lapply(ordered_df_list_intest, dim)
    lapply(ordered_df_list_intest_0.7, dim)

    IndicatorSpecies_list[[IndicatorSpecies_list_length+1]] <- ordered_df_list_intest_0.7
    names(IndicatorSpecies_list)[[IndicatorSpecies_list_length+1]] <- paste(Datasetname, Comparison, sep="_")
}
## [1] "SSU"
## [1] "Fish"
## [1] "OE"
## [1] "GC"
pslist$IndicatorSpecies_list <- IndicatorSpecies_list

ps <- merge_phyloseq(pslist$ps_OE, pslist$ps_GC, pslist$ps_WF)

SSU_taxa <- rownames(tax_table(ps))
SSU_taxa <- setdiff(SSU_taxa, c(rownames(IndicatorSpecies_list$SSU_Kind$Fish), rownames(IndicatorSpecies_list$SSU_Kind$Water)))
pslist$ps_WF
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 2364 taxa and 19 samples ]
## sample_data() Sample Data:       [ 19 samples by 61 sample variables ]
## tax_table()   Taxonomy Table:    [ 2364 taxa by 7 taxonomic ranks ]
## refseq()      DNAStringSet:      [ 2364 reference sequences ]
##############
#Add Taxonomy#
##############
  InDat_list_Species <- IndicatorSpecies_list$Fish_Species
  for (IndDat in names(InDat_list_Species)) {
    InDat_list_Species[[IndDat]]$ASV <- rownames(InDat_list_Species[[IndDat]])
    InDat_list_Species[[IndDat]] <- left_join(InDat_list_Species[[IndDat]],
    as.data.frame(tax_table(merge_phyloseq(pslist$ps_OE, pslist$ps_GC, pslist$ps_WF))) %>% 
      mutate(ASV = rownames(.)))}

  InDat_list_Kind <- IndicatorSpecies_list$SSU_Kind
  for (IndDat in names(InDat_list_Kind)) {
    InDat_list_Kind[[IndDat]]$ASV <- rownames(InDat_list_Kind[[IndDat]])
    InDat_list_Kind[[IndDat]] <- left_join(InDat_list_Kind[[IndDat]],
    as.data.frame(tax_table(merge_phyloseq(pslist$ps_Fish, pslist$ps_WF))) %>% 
      mutate(ASV = rownames(.)))}

  
#############################
#Extract Bacterial Biomarker#
#############################
  lapply(InDat_list_Species, dim)
## $GC
## [1]  9 13
## 
## $OE
## [1] 11 13
  CompsOfInterest_Fish<- c(
    "GC","OE", "GC+OE")
  IndicatorTaxa_Fish <- unique(as.vector(unlist(lapply(InDat_list_Species[
                            names(InDat_list_Species) %in% CompsOfInterest_Fish],function(df) df[["ASV"]]))))
  
  CompsOfInterest_Water<- c("Water")
  IndicatorTaxa_Water <- unique(as.vector(unlist(lapply(InDat_list_Kind[
                            names(InDat_list_Kind) %in% CompsOfInterest_Water],function(df) df[["ASV"]]))))
  
  CompsOfInterest_Season <- c(
      "Spring_22", 
      "Summer_21",
      "Winter_22",
      "Autumn_21")
  InDat_list_OE_Season <- IndicatorSpecies_list$OE_Season
      for (IndDat in names(InDat_list_OE_Season )) {
      InDat_list_OE_Season [[IndDat]]$ASV <- rownames(InDat_list_OE_Season [[IndDat]])
      InDat_list_OE_Season [[IndDat]] <- left_join(InDat_list_OE_Season [[IndDat]],
      as.data.frame(tax_table(pslist$ps_OE)) %>% mutate(ASV = rownames(.)))}
  IndicatorTaxa_OE_Season <- unique(as.vector(unlist(lapply(InDat_list_OE_Season[
                            names(InDat_list_OE_Season) %in% CompsOfInterest_Season],function(df) df[["ASV"]])
                            )))                            
  InDat_list_GC_Season <- IndicatorSpecies_list$GC_Season
      for (IndDat in names(InDat_list_GC_Season )) {
      InDat_list_GC_Season [[IndDat]]$ASV <- rownames(InDat_list_GC_Season [[IndDat]])
      InDat_list_GC_Season [[IndDat]] <- left_join(InDat_list_GC_Season [[IndDat]],
      as.data.frame(tax_table(pslist$ps_GC)) %>% mutate(ASV = rownames(.)))}
  IndicatorTaxa_GC_Season <- unique(as.vector(unlist(lapply(InDat_list_GC_Season[
                            names(InDat_list_GC_Season) %in% CompsOfInterest_Season],function(df) df[["ASV"]])
                            )))    
  
  IndicatorTaxa_list <- list(
    "IndicatorTaxa_Fish" = IndicatorTaxa_Fish, 
    "IndicatorTaxa_Water" = IndicatorTaxa_Water, 
    "IndicatorTaxa_OE_Season" =IndicatorTaxa_OE_Season, 
    "IndicatorTaxa_GC_Season" =IndicatorTaxa_GC_Season
  )
    
  

################################
#BacteriaHeatPlot Visualization# OE
################################
  rowclusternum        <- 4
  columnclusternum     <- 4
  FILENAME    <- paste(paste("Indicator_Species_OE_Season", Type, "LCF0", sep="_"), ".png", sep="")
  TITLE       <- draw_label_themeRKwhite(paste(""), element = "plot.title", x =  0.05, hjust = 0, vjust = 1)
 
  #CLR         <-  as.data.frame(SummarizedExperiment::assay(pslist$TSEc.l.r_SSU))
  CoreSet <- "OE"
  
  ps <- pslist$ps_OE
  ps_clr <- microbiome::transform(ps, "clr")
  CLR <- as.data.frame(SummarizedExperiment::assay(mia::makeTreeSummarizedExperimentFromPhyloseq(ps_clr)))
  
  SampleOrder <- SSU_Samples

  SAMDF       <- SAMDF16S[SAMDF16S$SampleID %in% rownames(sample_data(ps)),]
  WIDTH = 4 + length(SAMDF$SampleID) *0.04
  HEIGHT = 5
   
  #IndicatorTaxa_Reps <- unique(as.vector(unlist(lapply(InDat_list[
  #                          names(InDat_list) %in% CompsOfInterest],function(df) df[["ASV"]]))))

  IndicatorTaxa <- IndicatorTaxa_list$IndicatorTaxa_OE_Season

  IndicatorSpecies_HeatPlot <- BacteriaHeatPlotRK7_withCore(OmicsData = CLR, Species = CoreSet, genes_of_interest = IndicatorTaxa, Samples = colnames(CLR), SAMDF = SAMDF,  TITLE = TITLE, filename= FILENAME, OutlineColor=OutlineColor,  SHOW_ROW_NAMES = T, SHOW_ROW_NAMES_ALL = F,  ZScore =  F) 
## [1] "provide relative abundance data"

################################
#BacteriaHeatPlot Visualization# GC
################################
  rowclusternum        <- 4
  columnclusternum     <- 4
  FILENAME    <- paste(paste("Indicator_Species_GC_Season", Type, "LCF0", sep="_"), ".png", sep="")
  TITLE       <- draw_label_themeRKwhite(paste(""), element = "plot.title", x =  0.05, hjust = 0, vjust = 1)
 
  #CLR         <-  as.data.frame(SummarizedExperiment::assay(pslist$TSEc.l.r_SSU))
  CoreSet <- "GC"
  
  ps <- pslist$ps_GC
  ps_clr <- microbiome::transform(ps, "clr")
  CLR <- as.data.frame(SummarizedExperiment::assay(mia::makeTreeSummarizedExperimentFromPhyloseq(ps_clr)))
  
  SampleOrder <- SSU_Samples

  SAMDF       <- SAMDF16S[SAMDF16S$SampleID %in% rownames(sample_data(ps)),]
  WIDTH = 6 + length(SAMDF$SampleID) *0.04
  HEIGHT = 5
   
  #IndicatorTaxa_Reps <- unique(as.vector(unlist(lapply(InDat_list[
  #                          names(InDat_list) %in% CompsOfInterest],function(df) df[["ASV"]]))))

  IndicatorTaxa <- IndicatorTaxa_list$IndicatorTaxa_GC_Season

  IndicatorSpecies_HeatPlot <- BacteriaHeatPlotRK7_withCore(OmicsData = CLR, Species = CoreSet, genes_of_interest = IndicatorTaxa, Samples = colnames(CLR), SAMDF = SAMDF,  TITLE = TITLE, filename= FILENAME, OutlineColor=OutlineColor,  SHOW_ROW_NAMES = T, SHOW_ROW_NAMES_ALL = F,  ZScore =  F) 
## [1] "provide relative abundance data"

7.1.1 Extract

IndicSpecies_list <- list()

for (Dataset in names(IndicatorSpecies_list)) {
  
    Datasetname <- sub("_Season|_Species|_Kind", "", Dataset)
  
    if (Datasetname %in% c("Fish")) {
      ps <- merge_phyloseq(pslist$ps_OE, pslist$ps_GC)
      Comparison  <- "Species"
      Interested_Comparisons <- c(
        "GC", 
        "OE"
      )
    } else if (Datasetname %in% c("SSU")) {
      ps <- merge_phyloseq(pslist$ps_OE, pslist$ps_GC, pslist$ps_WF)
      #ps <- filter_taxa(ps, function (x) {sum(x) > 50}, prune=TRUE)
      Comparison  <- "Kind"
      Interested_Comparisons <- c(
        "Fish",
        "Water"
        )
    } else if (Datasetname %in% c("OE", "GC")) {
      ps <- pslist[[paste("ps",Datasetname, sep="_")]]
      Comparison  <- "Season"
      Interested_Comparisons <- c(
      "Spring_22", 
      "Summer_21",
      "Winter_22",
      "Autumn_21"
      )
    }

    for (IndDat in names(IndicatorSpecies_list[[Dataset]])) {
      
      df <- IndicatorSpecies_list[[Dataset]][[IndDat]]
      
      ps_sub <- prune_samples(sample_names(ps)[sample_data(ps)[[Comparison]] == IndDat], ps)
      
      df <- df %>% 
        mutate(ASV = rownames(.)) %>% 
        left_join(
          as.data.frame(tax_table(ps_sub)) %>% 
          mutate(ASV = rownames(.))
          ) %>% 
        left_join(as.data.frame(t(otu_table(ps_sub %>%
          transform_sample_counts(function(x) {x/sum(x)*100})))) %>% 
          dplyr::mutate(ASV = rownames(.))
          ) %>% 
        left_join(
          as.data.frame(t(otu_table(ps_sub %>%
          transform_sample_counts(function(x) {x/sum(x)*100})))) %>% 
          dplyr::mutate(ASVmeans = rowMeans(.)) %>%
          dplyr::mutate(ASV = rownames(.)) %>% 
          dplyr::select(ASV, ASVmeans)
          ) %>% 
        dplyr::arrange(desc(ASVmeans))  %>%
        dplyr::select(ASVmeans, everything())
    
      rownames(df) <- df$ASV
        
      write.csv2(df, file.path(path_Output_16S,paste(paste(save_name,  "IndicatorSpecies", Datasetname , Comparison, IndDat, Date, sep="_"), ".csv", sep="")))
  }
}

7.1.2 Barplots

Indicator taxa are very rare

############
#Multipratt#
############
GroupOfInterest <- na.omit(unique(rownames(IndicatorSpecies_list$OE_Season$Autumn_21)))
AddName <- "multipratt_OE_Autumn21"
TaxLevel <- "ASV"

flipped <- T
for (Dataset in names(pslist)[grepl("ps_GCWF|ps_OEWF", names(pslist))]) {
  
  require(plyr); require(dplyr); require(ggrepel); require(cowplot); require(phyloseq)
     
    Datasetname <- sub("ps_", "", Dataset)
    
  
    # GroupOfInterest_OE <- na.omit(unique(WGCNA_list[["OE"]]$SSUWGCNAlist[["SSU7"]]$ASV))
    # GroupOfInterest_GC <- na.omit(unique(WGCNA_list[["GC"]]$SSUWGCNAlist[["SSU2"]]$ASV))
    # print(paste(sum(GroupOfInterest_OE %in% GroupOfInterest_GC)/length(GroupOfInterest_OE)*100, "% of OE Module SSU7 are in GC Module SSU2"))
    # 
    # GroupOfInterest <- c(GroupOfInterest_OE, GroupOfInterest_GC)
    # #GroupOfInterest <- sub(".*:", "", GroupOfInterest)  
    # GroupOfInterest <-unique(GroupOfInterest)
    
    ps <- pslist[[Dataset]] 

    FILENAME    <- paste(paste(save_name, "Alpha_BarPlot_WF", AddName, Datasetname,  sep="_"), ".png", sep="")
    
    if (Datasetname %in% c("GC", "OE")) {
      WIDTH <- 10 + length(sample_names(pslist[[Dataset]])) *0.12
      HEIGHT <- 10 #length(sample_names(pslist[[Dataset]])) *0.08
    } else if (Datasetname %in% c("WF")) {
      WIDTH <-  5 + length(sample_names(pslist[[Dataset]])) *0.12
       HEIGHT <- 10 #length(sample_names(pslist[[Dataset]])) *0.08
    } else {
     WIDTH <-  10 + length(sample_names(pslist[[Dataset]])) *0.12
      HEIGHT <- 10 #length(sample_names(pslist[[Dataset]])) *0.08
    }
    
  
    ################################
    #Create Relative Abundance Data#
    ################################
    ps_alpha_barplot <- ps %>%
    #tax_glom(taxrank =  TaxLevel)   %>%  
    transform_sample_counts(function(x) {x/sum(x)*100}) %>% # Transform to rel. abundance
    psmelt() %>%                                         # Melt to long format
    #filter(Abundance > 1) %>%                       # Filter out low abundance taxa
    arrange(Genus)        %>%                                # Sort data frame alphabetically by phylum
    dplyr::arrange(desc(Abundance))
 
  
    ps_alpha_barplot$ASV <- sub(".*:", "", ps_alpha_barplot$OTU)  
    #ps_alpha_barplot$ASV <- sub('\\.', ' ', ps_alpha_barplot$ASV)
    
    ############################
    #Create TotalAbundance Data#
    ############################
    # phylum_abundance <- ps_alpha_barplot %>%
    #   dplyr::group_by(.data[[TaxLevel]]) %>%
    #   dplyr::summarise(TotalAbundance = sum(Abundance))
    # ordered_levels <- phylum_abundance %>%
    #   dplyr::arrange(desc(TotalAbundance)) %>%
    #   pull(.data[[TaxLevel]])
    #   
    # ps_alpha_barplot$Taxa <- factor(ps_alpha_barplot[[TaxLevel]], levels = ordered_levels)

    ps_alpha_barplot$SampleID <- factor(ps_alpha_barplot$SampleID, levels = 
                                          SSU_Samples[SSU_Samples %in% sample_names(pslist[[Dataset]])])
  
    
    SSU_Samples[SSU_Samples %in% sample_names(pslist[[Dataset]])]
    
    ##########################
    #Subset for Interest ASVs#
    ##########################
    df <- ps_alpha_barplot #[ps_alpha_barplot$OTU %in% GroupOfInterest,]
    #df <- df[df$OTU %in% GroupOfInterest,]
    df$Abundance[!(df$OTU %in% GroupOfInterest)] <- 0
    
    phylum_abundance <- df %>%
      dplyr::group_by(.data[[TaxLevel]]) %>%
      dplyr::summarise(TotalAbundance = sum(Abundance))
    ordered_levels <- phylum_abundance %>%
      dplyr::arrange(desc(TotalAbundance)) %>%
      pull(.data[[TaxLevel]])
      
    df$Taxa <- factor(df[[TaxLevel]], levels = ordered_levels)
    
    Alpha_Diversity_df <- df
    Alpha_Diversity_df$Colors <- NA
    Alpha_Diversity_df$Reps <-  factor(Alpha_Diversity_df$Reps, levels = RepOrder[RepOrder %in% Alpha_Diversity_df$Reps])
    
    #################################
    #Update to colored_taxonomy_Fish#
    #################################

    taxa_levels <- c("Genus", "Family", "Order", "Class", "Phylum")
    taxa_level2 <- c("Taxa", "Phylum2")

    Alpha_Diversity_df$Color <- "grey"

  # Loop through each row of Alpha_Diversity_df
  # for (i in 1:nrow(Alpha_Diversity_df)) {
  #   matching_color <- NA  # Initialize matching color to NA
  #   # Loop through each taxonomic level
  #   for (level in taxa_levels) {
  #   matching_row <- colored_taxonomy_Fish[colored_taxonomy_Fish$Taxa %in%
  #                                           Alpha_Diversity_df[[level]][i], ]
  #   # If there's a match, update matching_color and exit the loop
  #   if (nrow(matching_row) > 0) {
  #     matching_color <- matching_row$Color
  #     break
  #   }
  # }
  # # If there's no match at any level, try matching without numbers
  # if (is.na(matching_color)) {
  #   matching_row <- colored_taxonomy_Fish[gsub("\\d", "", colored_taxonomy_Fish$Phylum2) %in%
  #                                           Alpha_Diversity_df[["Phylum"]][i], ]
  #   if (nrow(matching_row) > 0) {
  #     matching_color <- matching_row$Color
  #   }
  # }
  # # Assign the matching color to the corresponding row in Alpha_Diversity_df
  # Alpha_Diversity_df$Color[i] <- matching_color
  # }
    
    color_mapping <- setNames(Alpha_Diversity_df$Color, Alpha_Diversity_df$Taxa)
    color_mapping[["Candidatus Megaira"]] <- "purple"
    color_mapping[["Acinetobacter.lwoffii"]] <- "#996600"
    color_mapping[["Acinetobacter.johnsonii"]] <- "#663300"
    color_mapping[["Shewanella"]] <- "#FF3366"
    color_mapping[["Shewanella.baltica"]] <- "#FF0099"
    color_mapping[["Shewanella.putrefaciens"]] <-"#FF0066"
    color_mapping[["Photobacterium"]] <- "#FFF000"
    color_mapping[["Aeromonas"]] <- "#FFCC00"
    

    levels(Alpha_Diversity_df$Taxa)[!levels(Alpha_Diversity_df$Taxa) %in% head(unique(sub(".*:", "", GroupOfInterest)), 34)  ] <-
      "Other"

    if (flipped == FALSE) {
      if (Datasetname == "WF") {
        p <- ggplot(Alpha_Diversity_df, 
          aes(x = SampleID, y = Abundance, fill = factor(Taxa))) + 
          geom_bar(stat = "identity") +
          facet_grid(. ~ factor(Season, levels = SeasonOrder), drop = TRUE, scale = "free", 
                     space = "free_x")
        COL <- col.Palette$col.Palette.Season[names(col.Palette$col.Palette.Season) %in%
                                              unique(sample_data(ps)$Season)]
      } else {
       p <- ggplot(Alpha_Diversity_df, 
        aes(x = SampleID, y = Abundance, fill = factor(Taxa))) + 
        geom_bar(stat = "identity") +
        facet_grid(. ~ factor(Reps, levels = RepOrder), drop = TRUE, scale = "free", space = "free_y")
      COL <- col.Palette$col.Palette.Reps[names(col.Palette$col.Palette.Reps) %in%                                    unique(sample_data(ps)$Reps)]
      }
      p <- p + 
        atheme +
        ylab("Relative Abundance [%] \n") + xlab("") + 
        labs(fill="") +
        scale_fill_manual(values = color_mapping) +
        ylim(0, 60) +
  
        #ggrepel::geom_text_repel(x = df$SampleID, y = df$Abundance,  aes(label = Labels),
        #inherit.aes = FALSE, min.segment.length = 0,nudge_y = 20,size=3, max.overlaps = Inf) +
      #awhite + 
        theme(strip.text.y = element_text(angle = 0))  +
        #theme(axis.text.x=element_blank(),
        #axis.text.y=element_blank(),
        #axis.title.y.left = element_blank(),
        #axis.ticks.y =element_blank() 
        #) +
        theme(
         panel.background = element_rect(fill='transparent'), #transparent panel bg
         plot.background = element_rect(fill='transparent', color=NA), #transparent plot bg
         #legend.background = element_rect(fill='transparent'), #transparent legend bg
         #legend.box.background = element_rect(fill='transparent') #transparent legend panel
         ) +
      theme(axis.title.x.bottom =  element_text(color="grey13"), 
        strip.text = element_text(color = "black", face= "bold")) +
      theme(
        legend.title=element_text(size=9), 
        legend.text=element_text(size=9), 
        axis.text.x.bottom = element_text(size= 9, face = "bold", angle = 45, hjust = 1),
        strip.text.y.left = element_text(size = 9,face = "bold"), 
        axis.title.y.left = element_text(size=12,face = "bold"))
      
      g <- ggplot_gtable(ggplot_build(p))
        stripr <- which(grepl('strip-t', g$layout$name))
        fills <- alpha(COL, 0.8)
        k <- 1
        for (iii in stripr) {
        j <- which(grepl('rect', g$grobs[[iii]]$grobs[[1]]$childrenOrder))
        g$grobs[[iii]]$grobs[[1]]$children[[j]]$gp$fill <- fills[k]
        k <- k+1}
    
        Alpha_Diversity_BarPlot <- cowplot::plot_grid(g, labels = c(""), ncol = 1)
        Alpha_Diversity_BarPlot <- plot_grid(Alpha_Diversity_BarPlot, ncol = 1, rel_heights = c(100))
        ggsave(Alpha_Diversity_BarPlot, filename = FILENAME, path = pathPlots, device='png', dpi=300,
               width = WIDTH, height = 6)
        
    } else if (flipped == TRUE) {
      # New facet label names for supp variable
    
      COL <- col.Palette$col.Palette.Reps[names(col.Palette$col.Palette.Reps) %in%                                    unique(sample_data(ps)$Reps)]
      
      Short.labs <- gsub("[^0-9]", "", Alpha_Diversity_df$Reps)
      names(Short.labs) <- Alpha_Diversity_df$Reps

      
      p <- ggplot(Alpha_Diversity_df, aes(x = SampleID, y = Abundance, fill = factor(Taxa))) + 
            geom_bar(stat = "identity") +
            coord_flip() +
            #facet_grid(rows = vars(Reps), drop = TRUE, scale = "free", space = "free_x", switch = "y")
        facet_grid(rows = vars(Reps), drop = TRUE, scale = "free", space = "free_x", switch = "y", 
              labeller = labeller(Reps = Short.labs)) 
      
      p <- p + 
        ylab("Relative Abundance [%] \n") + xlab("") + 
        labs(fill="") + #atheme+
        scale_fill_manual(values = color_mapping) +
        ylim(0, 60) +
            theme(axis.title.y=element_blank(),
            axis.text.y=element_blank(),
            axis.ticks.y=element_blank()) +
        theme(
         panel.background = element_rect(fill='transparent'), #transparent panel bg
         plot.background = element_rect(fill='transparent', color=NA), #transparent plot bg
         ) +
      theme(axis.title.x.bottom =  element_text(color="grey13", size=15, face ="bold"), 
        strip.text = element_text(color = "black", face= "bold")) +
      theme(
        legend.title=element_text(size=9), 
        legend.text=element_text(size=9), 
        axis.text.x.bottom = element_text(size= 15, face = "bold", angle = 45, hjust = 1),
        #strip.text.y.left = element_text(size = 9,face = "bold"), 
        #axis.title.y.left = element_text(size=12,face = "bold")
        ) +
        theme(legend.position = "none")
      
      g <- ggplot_gtable(ggplot_build(p))
        stripr <- which(grepl('strip-l', g$layout$name))
        fills <- alpha(COL, 0.8)
        k <- 1
        for (iii in stripr) {
        j <- which(grepl('rect', g$grobs[[iii]]$grobs[[1]]$childrenOrder))
        g$grobs[[iii]]$grobs[[1]]$children[[j]]$gp$fill <- fills[k]
        k <- k+1}
        
    Alpha_Diversity_BarPlot <- cowplot::plot_grid(g, labels = c(""), ncol = 1)
    Alpha_Diversity_BarPlot <- plot_grid(Alpha_Diversity_BarPlot, ncol = 1, rel_heights = c(100))
    ggsave(Alpha_Diversity_BarPlot, filename = FILENAME, path = pathPlots, device='png', dpi=300, width = 2.7, height = HEIGHT)
    
    }
}

7.1.3 Indicator LOC

https://cran.r-project.org/web/packages/indicspecies/vignettes/IndicatorSpeciesAnalysis.html

#install.packages("indicspecies")
IndicatorSpecies_Reps_list <- list()
for (Dataset in names(pslist)[!grepl("WF", names(pslist))][grepl("ps_GC|ps_OE", names(pslist)[!grepl("WF", names(pslist))])]) {
  
    require(indicspecies)
    Datasetname <- sub("ps_", "", Dataset)
    IndicatorSpecies_Reps_list_length <- length(IndicatorSpecies_Reps_list)
  
  
      ps <- pslist[[Dataset]]
      #Comparison  <- "Reps"
      #Interested_Comparisons <- RepOrder[RepOrder %in% sample_data(ps)$Reps]
      
      Comparison  <- "LOC"
      Interested_Comparisons <- LOCOrder[LOCOrder %in% sample_data(ps)$LOC]
    
    result <- indicspecies::multipatt(
      otu_table(ps), 
      sample_data(ps)[[paste(Comparison)]], 
      #func = "IndVal.g", 
      control = how(nperm = 999))
  
    df_list <- split(result$sign, result$sign$index)
    Groups <- as.vector(dimnames(result$comb))[[2]]
    
    for (i in seq_along(names(df_list))) {
      names(df_list)[[i]] <- Groups[i]}

    filtered_df_list <- list()

    for (group_name in names(df_list)[-length(df_list)]) {
      filtered_df <- df_list[[group_name]]
      filtered_df <- filtered_df[filtered_df$p.value < 0.05,]
      filtered_df_list[[group_name]] <- filtered_df}
    
    filtered_df_list[[names(df_list)[length(df_list)]]] <- df_list[[length(df_list)]]

    ordered_df_list <- lapply(filtered_df_list, function(df) df[order(df$stat, decreasing = T), ])
    #cat(paste(shQuote(names(ordered_df_list), type="cmd"), collapse=", ")) 
    
    ordered_df_list_intest <- ordered_df_list[names(ordered_df_list) %in% Interested_Comparisons]
    ordered_df_list_intest_0.7 <-list()
    
    for (group_name in names(ordered_df_list_intest)) {
      filtered_df <- df_list[[group_name]]
      filtered_df <- filtered_df[filtered_df$stat > 0.7, ]
      ordered_df_list_intest_0.7[[group_name]] <- filtered_df
    }
    
    print(Datasetname)
    lapply(ordered_df_list_intest, dim)
    lapply(ordered_df_list_intest_0.7, dim)

    IndicatorSpecies_Reps_list[[IndicatorSpecies_Reps_list_length+1]] <- ordered_df_list_intest_0.7
    names(IndicatorSpecies_Reps_list)[[IndicatorSpecies_Reps_list_length+1]] <- paste(Datasetname, Comparison, sep="_")
}
## [1] "OE"
## [1] "GC"
pslist$IndicatorSpecies_list_LOC <- IndicatorSpecies_Reps_list

##############
#Add Taxonomy#
##############
  InDat_list_Species <- IndicatorSpecies_list$SSU_Species
  for (IndDat in names(InDat_list_Species)) {
    InDat_list_Species[[IndDat]]$ASV <- rownames(InDat_list_Species[[IndDat]])
    InDat_list_Species[[IndDat]] <- left_join(InDat_list_Species[[IndDat]],
    as.data.frame(tax_table(merge_phyloseq(pslist$ps_OE, pslist$ps_GC, pslist$ps_WF))) %>% 
      mutate(ASV = rownames(.)))}

#############################
#Extract Bacterial Biomarker#
#############################

  
  CompsOfInterest_LOC <- c(
        "MG-713", 
        "BB-692", 
        "SS-665", 
        "TF-651", 
        "ML-633")

  InDat_list_OE_LOC <- pslist$IndicatorSpecies_list_LOC$OE_LOC
      for (IndDat in names(InDat_list_OE_LOC)) {
      InDat_list_OE_LOC [[IndDat]]$ASV <- rownames(InDat_list_OE_LOC [[IndDat]])
      InDat_list_OE_LOC [[IndDat]] <- left_join(InDat_list_OE_LOC [[IndDat]],
      as.data.frame(tax_table(pslist$ps_OE)) %>% mutate(ASV = rownames(.)))}
## Joining with `by = join_by(ASV)`
## Joining with `by = join_by(ASV)`
## Joining with `by = join_by(ASV)`
## Joining with `by = join_by(ASV)`
## Joining with `by = join_by(ASV)`
  IndicatorTaxa_OE_LOC <- unique(as.vector(unlist(lapply(InDat_list_OE_LOC[
                            names(InDat_list_OE_LOC) %in% CompsOfInterest_LOC],function(df) df[["ASV"]])
                            )))  
  
  InDat_list_GC_LOC <- pslist$IndicatorSpecies_list_LOC$GC_LOC
      for (IndDat in names(InDat_list_GC_LOC )) {
      InDat_list_GC_LOC [[IndDat]]$ASV <- rownames(InDat_list_GC_LOC [[IndDat]])
      InDat_list_GC_LOC [[IndDat]] <- left_join(InDat_list_GC_LOC [[IndDat]],
      as.data.frame(tax_table(pslist$ps_GC)) %>% mutate(ASV = rownames(.)))}
## Joining with `by = join_by(ASV)`
## Joining with `by = join_by(ASV)`
## Joining with `by = join_by(ASV)`
## Joining with `by = join_by(ASV)`
  IndicatorTaxa_GC_LOC <- unique(as.vector(unlist(lapply(InDat_list_GC_LOC[
                            names(InDat_list_GC_LOC) %in% CompsOfInterest_LOC],function(df) df[["ASV"]])
                            )))    

7.1.4 Extract

IndicSpecies_list <- list()

for (Dataset in names(pslist$IndicatorSpecies_list_LOC)) {
  
  IndicatorSpecies_list <- pslist$IndicatorSpecies_list_LOC
    Datasetname <- sub("_LOC", "", Dataset)
  
    if (Datasetname %in% c("Fish")) {
      ps <- merge_phyloseq(pslist$ps_OE, pslist$ps_GC)
      Comparison  <- "Species"
      Interested_Comparisons <- c(
        "GC", 
        "OE"
      )
    } else if (Datasetname %in% c("SSU")) {
      ps <- merge_phyloseq(pslist$ps_OE, pslist$ps_GC, pslist$ps_WF)
      #ps <- filter_taxa(ps, function (x) {sum(x) > 50}, prune=TRUE)
      Comparison  <- "Kind"
      Interested_Comparisons <- c(
        "Fish",
        "Water"
        )
    } else if (Datasetname %in% c("OE", "GC")) {
      ps <- pslist[[paste("ps",Datasetname, sep="_")]]
      Comparison  <- "LOC"
      Interested_Comparisons <- c(
        "MG-713", 
        "BB-692", 
        "SS-665", 
        "TF-651", 
        "ML-633"
      )
    }

    for (IndDat in names(IndicatorSpecies_list[[Dataset]])) {
      
      df <- IndicatorSpecies_list[[Dataset]][[IndDat]]
      
      ps_sub <- prune_samples(sample_names(ps)[sample_data(ps)[[Comparison]] == IndDat], ps)
      
      df <- df %>% 
        mutate(ASV = rownames(.)) %>% 
        left_join(
          as.data.frame(tax_table(ps_sub)) %>% 
          mutate(ASV = rownames(.))
          ) %>% 
        left_join(as.data.frame(t(otu_table(ps_sub %>%
          transform_sample_counts(function(x) {x/sum(x)*100})))) %>% 
          mutate(ASV = rownames(.))
          ) %>% 
        left_join(
          as.data.frame(t(otu_table(ps_sub %>%
          transform_sample_counts(function(x) {x/sum(x)*100})))) %>% 
          dplyr::mutate(ASVmeans = rowMeans(.)) %>%
          dplyr::mutate(ASV = rownames(.)) %>% 
          dplyr::select(ASV, ASVmeans)
          ) %>% 
        dplyr::arrange(desc(ASVmeans))  %>%
        dplyr::select(ASVmeans, everything())
    
      rownames(df) <- df$ASV
        
      write.csv2(df, file.path(path_Output_16S,paste(paste(save_name,  "IndicatorSpecies", Datasetname , Comparison, IndDat, Date, sep="_"), ".csv", sep="")))
  }
}
## Joining with `by = join_by(ASV)`
## Joining with `by = join_by(ASV)`
## Joining with `by = join_by(ASV)`
## Joining with `by = join_by(ASV)`
## Joining with `by = join_by(ASV)`
## Joining with `by = join_by(ASV)`
## Joining with `by = join_by(ASV)`
## Joining with `by = join_by(ASV)`
## Joining with `by = join_by(ASV)`
## Joining with `by = join_by(ASV)`
## Joining with `by = join_by(ASV)`
## Joining with `by = join_by(ASV)`
## Joining with `by = join_by(ASV)`
## Joining with `by = join_by(ASV)`
## Joining with `by = join_by(ASV)`
## Joining with `by = join_by(ASV)`
## Joining with `by = join_by(ASV)`
## Joining with `by = join_by(ASV)`
## Joining with `by = join_by(ASV)`
## Joining with `by = join_by(ASV)`
## Joining with `by = join_by(ASV)`
## Joining with `by = join_by(ASV)`
## Joining with `by = join_by(ASV)`
## Joining with `by = join_by(ASV)`
## Joining with `by = join_by(ASV)`
## Joining with `by = join_by(ASV)`
## Joining with `by = join_by(ASV)`

#-

7.2 Aldex2

# VARIABLE <- "Species"
# ps <- pslist$ps_Fish
# asv_dat <- data.frame(t(phyloseq::otu_table(ps)))
# metadata <- data.frame(phyloseq::sample_data(ps))
# taxa <- data.frame(phyloseq::tax_table(ps)) %>%
#   rownames_to_column(var="ASV")
# 
#   ########
#   #ALDEX2#
#   ########
#   ALDEx2_results <- ALDEx2::aldex(asv_dat, metadata[[VARIABLE]], test="t", effect = TRUE, denom="iqlr")
#   ALDEx2::aldex.plot(ALDEx2_results , type="MW", test="wilcox", called.cex = 1, cutoff = 0.05)
#   ALDEx2_results_sig <- ALDEx2_results %>%
#     rownames_to_column(var = "ASV") %>%
#     filter(wi.eBH < 0.05) %>%
#     arrange(effect, wi.eBH) %>%
#     dplyr::select(ASV, diff.btw, diff.win, effect, wi.ep, wi.eBH) %>%
#      left_join( #Add relative ASVmeans 
#        data.frame(t(phyloseq::otu_table(ps %>%
#         transform_sample_counts(function(x) {x/sum(x)*100})))) %>%
#           dplyr::mutate(ASVmeans = rowMeans(.)) %>%
#           dplyr::mutate(ASV = rownames(.)) %>% 
#           dplyr::select(ASV, ASVmeans)
#     ) %>%
#     left_join(taxa) %>% #Add taxonomy
#     left_join( #Add relative ASV data per sample
#         data.frame(t(phyloseq::otu_table(ps %>%
#         transform_sample_counts(function(x) {x/sum(x)*100})))) %>%
#         rownames_to_column(var = "ASV")
#     ) %>%
#     dplyr::arrange(desc(ASVmeans))
#   
#   rowclusternum <-2
#   columnclusternum <-2
#   HEIGHT <-5
#   WIDTH <- 6
#   CoreSet <- "Fish"
#   FILENAME    <- paste(paste(save_name,"ALDEX2_Species", sep="_"), ".png", sep="")
#   TITLE       <- draw_label_themeRK(paste("ALDEX2"), element = "plot.title", x =  0.05, hjust = 0, vjust = 1)
#   
#   #OmicsData <- data.frame(t(phyloseq::otu_table(ps %>%
#   #      transform_sample_counts(function(x) {x/sum(x)*100}))))
#   
#   OmicsData <- data.frame(t(phyloseq::otu_table(microbiome::transform(ps, "clr"))))
#   
#   BacteriaHeatPlotRK7_withCore(OmicsData = OmicsData, 
#                                Species = Species, 
#                                genes_of_interest = ALDEx2_results_sig$ASV, 
#                                Samples = sample_names(pslist$clr_Fish), 
#                                SAMDF = data.frame(sample_data(pslist$clr_Fish)),  
#                                TITLE = TITLE, 
#                                filename= FILENAME, 
#                                OutlineColor="grey20", SHOW_ROW_NAMES = TRUE, SHOW_ROW_NAMES_ALL = TRUE, 
#                                ZScore = TRUE
#                                )

7.2 Combine ALDEX2 & IndicatorSpecies

# IndicSpecies_Species <- c(rownames(pslist$IndicatorSpecies_list$Fish_Species$GC), rownames(pslist$IndicatorSpecies_list$Fish_Species$OE))
# 
# ALDEx2_results_sig
# 
# ALDEx2_results_sig$ASV 
# IndicSpecies_Species[IndicSpecies_Species %in% ALDEx2_results_sig$ASV]
# 
# ALDEx2_results_sig[ALDEx2_results_sig$ASV %in% IndicSpecies_Species,]$ASV
# 
# 
#  BacteriaHeatPlotRK7_withCore(OmicsData = OmicsData, 
#                                Species = Species, 
#                                genes_of_interest = ALDEx2_results_sig[ALDEx2_results_sig$ASV %in% IndicSpecies_Species,]$ASV, 
#                                Samples = sample_names(pslist$clr_Fish), 
#                                SAMDF = data.frame(sample_data(pslist$clr_Fish)),  
#                                TITLE = TITLE, 
#                                filename= FILENAME, 
#                                OutlineColor="grey20", SHOW_ROW_NAMES = TRUE, SHOW_ROW_NAMES_ALL = TRUE, 
#                                ZScore = TRUE
#                                )

7.3 Ancom

super slow

  ##########
  #ANCOMBC2#
  ##########
  # TSE <-mia::makeTreeSummarizedExperimentFromPhyloseq(ps)
  # ANCOMBC_results <- ANCOMBC::ancombc2(data = TSE, assay_name = "counts", #tax_level = "ASV",
  #                 fix_formula = "Species")
  # 
  # res_prim = output$res
  # df_age = res_prim %>%
  #   dplyr::select(taxon, ends_with("age")) 
  # df_fig_age = df_age %>%
  #   dplyr::filter(diff_age == 1) %>% 
  #   dplyr::arrange(desc(lfc_age)) %>%
  #   dplyr::mutate(direct = ifelse(lfc_age > 0, "Positive LFC", "Negative LFC"),
  #                 color = ifelse(passed_ss_age == 1, "aquamarine3", "black"))
  # df_fig_age$taxon = factor(df_fig_age$taxon, levels = df_fig_age$taxon)
  # df_fig_age$direct = factor(df_fig_age$direct, 
  #                          levels = c("Positive LFC", "Negative LFC"))
  # 
  # fig_age = df_fig_age %>%
  #   ggplot(aes(x = taxon, y = lfc_age, fill = direct)) + 
  #   geom_bar(stat = "identity", width = 0.7, color = "black", 
  #            position = position_dodge(width = 0.4)) +
  #   geom_errorbar(aes(ymin = lfc_age - se_age, ymax = lfc_age + se_age), 
  #                 width = 0.2, position = position_dodge(0.05), color = "black") + 
  #   labs(x = NULL, y = "Log fold change", 
  #        title = "Log fold changes as one unit increase of age") + 
  #   scale_fill_discrete(name = NULL) +
  #   scale_color_discrete(name = NULL) +
  #   theme_bw() + 
  #   theme(plot.title = element_text(hjust = 0.5),
  #         panel.grid.minor.y = element_blank(),
  #         axis.text.x = element_text(angle = 60, hjust = 1,
  #                                    color = df_fig_age$color))
  # fig_age
  # 
  # 
  # 
  # ########
  # #DESEQ2#
  # ########
  # require(DESeq2)
  # countData = round(as(asv_dat, "matrix"), digits = 0)
  # 
  # dds <- DESeqDataSetFromMatrix(countData =round(as(asv_dat, "matrix"), digits = 0), colData = metadata,
  #       design =as.formula(paste("~",VARIABLE)))
  # dds  <- DESeq(dds)
  # vst   <- varianceStabilizingTransformation(dds)
  #   
  # VARIABLE <- variable
  #  
  # mylist <- list() 
  # for (i in 1:nrow(A)) {
  # res <- results(ddslist[[Dataset]], lfcThreshold = 0.0, alpha=0.05, contrast=c(VARIABLE,A[i,1],A[i,2]))
  # mylist[[i]] <- res 
  # names(mylist)[[i]] <- A[i,3]}
  # 
  # mylist2 <-list()
  # for (x in names(mylist)) {
  # mylist2[[x]]<-dplyr::left_join(rownames_to_column(as.data.frame(mylist[[x]])),  
  #                         rownames_to_column(as.data.frame(VST[rownames(VST) %in%
  #                         rownames(as.data.frame(mylist[[x]])),])))
  # 
  # mylist2 <- lapply(mylist2,na.omit)
  # paste("Dim with NAs")
  # paste(sapply(mylist2,dim))
  # paste("Bacterial species with Basemean lower 1")
  # paste(sapply(lapply(mylist2, function(y) {y[which(y$baseMean < 1),]}),dim))
  # mylist2 <- lapply(mylist2, function(y) {y[which(y$padj < alpha),]})
  # paste("Dim NA omit")
  # paste(sapply(mylist2,dim))
  # mylist2 <- lapply(mylist2, function(z){z[order(z$padj),]})
  # mylist2 <- lapply(mylist2, function(x) {x$sign <-ifelse(sign(x$log2FoldChange) > 0, "enriched", "diminished");return(x)})
  # }

#-

7.4 DESeq2

variable <- "Season"
ddslist <- list()
for (Dataset in names(pslist)[!grepl("WF", names(pslist))][grepl("ps_GC|ps_OE", names(pslist)[!grepl("WF", names(pslist))])]){
  paste("Code Raphael Koll raphael.koll@uni-hamburg.de")
  require(phyloseq); require(DESeq2); require(tidyverse); require(plyr); require(dplyr)
  Datasetname <- paste(Dataset); print(Datasetname)
  a <- length(ddslist)
  ps <- pslist[[Dataset]]
  #sample_data(ps)[[variable]] <- as.factor(sample_data(ps)[[variable]])
  #dds <- phyloseq_to_deseq2(ps, as.formula(paste("~",variable)))
  #dds <- DESeq(dds)
  
    if (grepl("WF", Datasetname)) {
    VARIABLE <- "Kind"
    } else {
    VARIABLE <- variable
    }
  
    if (!taxa_are_rows(ps)) {
        ps <- t(ps)}
    countData = round(as(otu_table(ps), "matrix"), digits = 0)
    SAMDF<-SAMDF16S[SAMDF16S$SampleID %in% rownames(sample_data(ps)),]
    library(DESeq2)
    dds<- DESeqDataSetFromMatrix(countData =
        as.matrix(as.data.frame(countData)[SAMDF$SampleID]), colData = SAMDF,
        design =as.formula(paste("~",VARIABLE)))
    dds  <- DESeq(dds)
    vst   <- varianceStabilizingTransformation(dds)
    ddslist [[a+1]] <- ps
    names(ddslist )[[a+1]] <- paste("ps", sub("ps", "\\1", Datasetname), sep="")
    ddslist [[a+2]] <- dds
    names(ddslist )[[a+2]] <- paste("dds", sub("ps", "\\1", Datasetname), sep="")
    ddslist [[a+3]] <- vst
    names(ddslist )[[a+3]] <- paste("vst", sub("ps", "\\1", Datasetname), sep="")
}
## [1] "ps_OE"
## [1] "ps_GC"
for (Dataset in names(ddslist)[grepl("dds", names(ddslist))]) {
  paste("Code Raphael Koll raphael.koll@uni-hamburg.de")
  require(DESeq2); require(tidyverse); require(plyr); require(dplyr)
  tryCatch({
  a       <- length(ddslist)
  Datasetname      <- sub('....', '', Dataset); print(Datasetname)
  name2   <- ddslist[names(ddslist)[grepl(paste(Datasetname), names(ddslist))]]
  VST     <- assay(name2[[names(name2)[grepl("vst", names(name2))]]])
  SAMDF   <- data.frame(sample_data(name2[[names(name2)[grepl("ps", names(name2))]]]))
  
  ###############Selection of Comparisons################
  VariableOrder<-VariableOrder
  A <- as.data.frame(t(combn(SAMDF %>% 
  arrange(factor(Season, levels = VariableOrder)) %>% #Chane hard coded Variable name here! 
  pull(paste(variable)) %>% 
  unique(),2)))
  A$V3<-Reduce(function(...) paste(..., sep = "-"), A)
  #######################################################
  
  #  if (grepl("WF", i)) {
  #   VARIABLE <- "Kind"
  #   A <-data.frame(c("Fish"="Fish"), c("Water"="Water"), c("Fish-Water"= "Fish-Water"))
  # } else {
  #   VARIABLE <- variable
  # }
 
  VARIABLE <- variable
   
  mylist <- list() 
  for (i in 1:nrow(A)) {
  res <- results(ddslist[[Dataset]], lfcThreshold = 0.0, alpha=0.05, contrast=c(VARIABLE,A[i,1],A[i,2]))
  mylist[[i]] <- res 
  names(mylist)[[i]] <- A[i,3]}
  
  mylist2 <-list()
  for (x in names(mylist)) {
  mylist2[[x]]<-dplyr::left_join(rownames_to_column(as.data.frame(mylist[[x]])),  
                          rownames_to_column(as.data.frame(VST[rownames(VST) %in%
                          rownames(as.data.frame(mylist[[x]])),])))

  mylist2 <- lapply(mylist2,na.omit)
  paste("Dim with NAs")
  paste(sapply(mylist2,dim))
  paste("Bacterial species with Basemean lower 1")
  paste(sapply(lapply(mylist2, function(y) {y[which(y$baseMean < 1),]}),dim))
  mylist2 <- lapply(mylist2, function(y) {y[which(y$padj < alpha),]})
  paste("Dim NA omit")
  paste(sapply(mylist2,dim))
  mylist2 <- lapply(mylist2, function(z){z[order(z$padj),]})
  mylist2 <- lapply(mylist2, function(x) {x$sign <-ifelse(sign(x$log2FoldChange) > 0, "enriched", "diminished");return(x)})
  }
  
  ddslist[[a+1]] <- mylist2
  names(ddslist)[[a+1]] <- paste("res", sub("dds", "\\1", Datasetname), sep="")},
  error=function(e){cat("ERROR :",conditionMessage(e), "\n")})
}
## [1] "OE"
## [1] "GC"
names(ddslist)
## [1] "ps_OE"  "dds_OE" "vst_OE" "ps_GC"  "dds_GC" "vst_GC" "resOE"  "resGC"
paste("Differentially Abundant Taxa")
## [1] "Differentially Abundant Taxa"
sapply(ddslist$"resGC" ,dim)[1,]
## Summer_21-Autumn_21 Summer_21-Winter_22 Summer_21-Spring_22 Autumn_21-Winter_22 
##                 324                 439                 518                 211 
## Autumn_21-Spring_22 Winter_22-Spring_22 
##                 342                 228
###########
#Save Data#
###########
saveRDS(ddslist, file = paste0(file.path(path_Output_16S, paste(paste(save_name,"16S_DAT_Replicates_pairwise", Date, sep="_"), ".rds", sep=""))))

ddslist<-readRDS(file = paste0(file.path(path_Output_16S, paste(paste(save_name, "16S_DAT_Replicates_pairwise", Date, sep="_"), ".rds", sep=""))))

7.4.1 DAT visualization

7.4.2 Pairwise Barplots

Creates barplots of pairwise comparisons, excluded here as there are many

###################
#Pairwise Barplots#
###################
# for (i in names(ddslist)[grepl("res", names(ddslist))]){
#   paste("Code Raphael Koll raphael.koll@uni-hamburg.de")
#   require(plyr); require(ggrepel); require(cowplot);  require(scales)
#   tryCatch({
#   g       <- paste(i) 
#   gg      <- sub('....', '', g)
#   res     <- ddslist[[i]]
#   FILENAME    <- paste(paste(Species, gg, Type, "LCF0"), ".png", sep="")
#   TITLE       <- paste(Species, gg, Type, "Differential abundant taxa", sep=" ")
#   for (x in names(res)){ 
#         tryCatch({
#   res_tax_sig <- res[[x]]
#   xx       <- paste(x)
#   #res <- res[res$baseMean >= 10,] #Select Species with Basemean higher 10
#   res_tax_sig<-res_tax_sig[order(res_tax_sig$baseMean, decreasing=T), ] #order for BaseMean
#   res_tax_sig_head <-head(res_tax_sig, 20)
#   rownames(res_tax_sig_head) <-res_tax_sig_head$rowname
#   p <- ggplot(data=res_tax_sig_head, aes(x=fct_reorder(rownames(res_tax_sig_head), baseMean), 
#                                     y= log2FoldChange, fill = baseMean)) +
#   geom_bar(stat="identity") + coord_flip() +
#   scale_fill_gradient(low = "white", high = "red", breaks = c(10, 100, 500, 1000), limits=c(10,500), oob=squish) +
#   geom_text(aes(label = round(baseMean, 0)), color="black", size=2.5)+
#   theme(axis.text.y = element_text(size = 8), strip.text.x = element_text(size = 7),
#         legend.title = element_text( size=6), legend.text=element_text(size=6), 
#         axis.title = element_text(size = 8)) +  xlab("") + ylab("log2 FoldChange") 
#   prow <- cowplot::plot_grid(p, labels = c(""), ncol = 1)
#   title <- cowplot::ggdraw() + draw_label_themeRK(paste(paste(TITLE, ":", sep=""), x, sep = "       "), 
#                 element = "plot.title",x = 0.05, hjust     = 0, vjust = 1)
#         subtitle <- cowplot::ggdraw() + draw_label_themeRK(paste("top 20 of", table(sign(res_tax_sig$log2FoldChange) > 0)[2],"enriched,", table(sign(res_tax_sig$log2FoldChange) > 0)[1], "diminished species,", "[tax-glom on species level,", dim(otu_table(ddslist[[paste("ps_", noquote(gg), sep="")]]))[1],"taxa]", sep = " "), element = "plot.subtitle",x = 0.05, hjust = 0, vjust = 1)
# if (OperatingSystem == "Mac" ) { quartz() }
# A<- plot_grid(title, subtitle, prow, ncol = 1, rel_heights = c(0.05, 0.05, 0.98))
#   plot(A)
#   ggsave(A, filename = paste(paste(save_name, Type, "DESeq2-pairwise", xx, sep="_"), ".png", sep=""), path = pathPlots, device='png', dpi=300, width = 8,
#   height = 8)},
#         error=function(e){cat("ERROR :",conditionMessage(e), "\n")})}
#   },error=function(e){cat("ERROR :",conditionMessage(e), "\n")})
# }

7.4.3 Aggregating DESeq2-Pairwise Comparisons

SSU_Data_list <- list()
for (Dataset in c(names(pslist)[!grepl("WF", names(pslist))][grepl("ps_GC|ps_OE", names(pslist)[!grepl("WF", names(pslist))])], "ps_WF")) {

  require(plyr)
  require(dplyr)
  require(phyloseq)

  Datasetname <- sub("ps_", "", Dataset)
  ps            <- pslist[[Dataset]]
  SSU_Data_list_length <- length(SSU_Data_list)
  
  
  TAX <- as.data.frame(tax_table(ps))
  OTU <- as.data.frame(t(otu_table(ps)))
  REFSEQ <- refseq(ps)
  REFSEQ <- stack(as.character(REFSEQ, use.names=TRUE))
  colnames(REFSEQ)[colnames(REFSEQ) == "ind"] <- "ASV"

  #################################
  #Creating AverageSums per Season#
  #################################
  SeasonSums <- ps %>%
    transform_sample_counts(function(x) {x/sum(x)*100}) %>%
    phyloseq::otu_table() %>%
    as.data.frame() %>%
    rownames_to_column(var = "SampleID") %>%
    dplyr::left_join(SAMDF16S, by = c("SampleID" = "SampleID")) %>%
    dplyr::group_by(Season) %>%
    dplyr::summarise(dplyr::across(rownames(tax_table(ps)), mean, na.rm = TRUE)) %>% 
    t() %>%
    as.data.frame() %>%
    `colnames<-`(.[1, ]) %>%
    .[-1, ] %>%
    setNames(paste0("avg_", colnames(.))) %>%
    mutate_all(as.numeric) %>%
    rownames_to_column(var="ASV")

    SSU_Data  <- TAX %>%  
     rownames_to_column(var = "ASV") %>% 
       left_join( #Add relative ASVmeans 
       data.frame(t(phyloseq::otu_table(ps %>%
        transform_sample_counts(function(x) {x/sum(x)*100})))) %>%
          dplyr::mutate(ASVmeans = rowMeans(.)) %>%
          dplyr::mutate(ASV = rownames(.)) %>% 
          dplyr::select(ASV, ASVmeans)
       ) %>%
    left_join( #Add Sums by Season
        SeasonSums
       ) %>%
    left_join( #Add relative ASV data per sample
        data.frame(t(phyloseq::otu_table(ps %>%
        transform_sample_counts(function(x) {x/sum(x)*100})))) %>%
        rownames_to_column(var = "ASV")
    ) %>%
     left_join(REFSEQ
    ) %>%
      dplyr::arrange(desc(ASVmeans))
   
   rownames(SSU_Data) <- SSU_Data$ASV


  SSU_Data_list[[SSU_Data_list_length+1]] <- SSU_Data
  names(SSU_Data_list)[[SSU_Data_list_length+1]] <- Datasetname 
}

###########################################################
#Determine the Cluster-Numbers from the unclustered-Heatmap
rowclusternum        <- 1
columnclusternum     <- 1
for (i in names(ddslist)[grepl("dds", names(ddslist))]) {
  paste("Code Raphael Koll raphael.koll@uni-hamburg.de code adapted from https://github.com/kevinblighe/E-MTAB-6141")
  tryCatch({  
  min_count   <- 1
  a           <- length(ddslist)
  g           <- paste(i)
  gg          <- sub('....', '', g)
  FILENAME    <- paste(paste(Species, gg, Type, "LCF0"), ".png", sep="")
  TITLE       <- draw_label_themeRKwhite(paste(Species, gg, Type, "DAT"), element = "plot.title", x =  0.05, hjust = 0, vjust = 1)
  name2       <- ddslist[names(ddslist)[grepl(paste(gg), names(ddslist))]]
  vst         <- assay(name2[[names(name2)[grepl("vst", names(name2))]]])
  ps          <-  name2[[names(name2)[grepl("ps", names(name2))]]]
  SAMDF       <- SAMDF16S[SAMDF16S$SampleID %in% rownames(sample_data(ps)),]
  res         <- name2[[names(name2)[grepl("res", names(name2))]]] #res
  res         <- lapply(res, function(z){z[z$baseMean >= min_count,]})
  
  resgenelist   <- list()
    for (ii in names(res)) {
      genes <- res[[ii]]$rowname
      resgenelist[[ii]] <- genes}
  resgenelist <- unique(as.vector(unlist(resgenelist)))

  
  
  A           <- BacteriaHeatPlotRK6(vst = vst, Species = Species, min_count = min_count, 
                          genes_of_interest = resgenelist, Samples = colnames(vst), SAMDF = SAMDF,  TITLE = TITLE)
  A 
   #Save heatmap to Environment
  #b           <- length(.GlobalEnv[[name]])
  #.GlobalEnv[[name]][[b+1]]        <- hmap
  #names(.GlobalEnv[[name]])[[b+1]] <- paste("hmap", sub("dds6", "\\1", g), sep="-")
  
  }, error=function(e){cat("ERROR :",conditionMessage(e), "\n")})}

############################################################
#Cluster Heatmap with Extraction of Genes/Taxa from Cluster#
rowclusternum        <- 4
columnclusternum     <- 4
Cluster       <- list()
for (Dataset in names(ddslist)[grepl("dds", names(ddslist))]) {
  paste("Code Raphael Koll raphael.koll@uni-hamburg.de")
  tryCatch({  
  min_mean_count   <- 10
  a           <- length(Cluster)
  Datasetname          <- sub('....', '', Dataset)
  CoreSet <- Datasetname
  FILENAME    <- paste(paste(save_name, Type, "LCF0", "MinMeanGroup", min_mean_count, Datasetname, sep="_"), ".png", sep="")
  TITLE       <- draw_label_themeRK(paste(Datasetname, Type, "DAT"), element = "plot.title", x =  0.05, hjust = 0, vjust = 1)


  name2       <- ddslist[names(ddslist)[grepl(paste(Datasetname), names(ddslist))]]
  vst         <- assay(name2[[names(name2)[grepl("vst", names(name2))]]])
  clr         <- as.data.frame(assay(pslist[[paste("TSEc.l.r", sub("dds", "", Dataset), sep="")]]))
  ps          <- name2[[names(name2)[grepl("ps", names(name2))]]]
  SAMDF       <- SAMDF16S[SAMDF16S$SampleID %in% rownames(sample_data(ps)),]
  res         <- name2[[names(name2)[grepl("res", names(name2))]]] #res

  WIDTH = 10 + length(SAMDF$SampleID) *0.05
  
  ##############################################
  #Filter for min_mean_count per sampling group#
  ASVs_to_keep <- list()
  for (Replicates2 in unique(SAMDF[[variable]])) {
    ASVs_to_keep_length <- length(ASVs_to_keep)
    df <- as.data.frame(otu_table(ps))
    df_order <- df[names(df) %in% SAMDF[SAMDF[variable] == Replicates2,]$SampleID]
    df_order[] <- sapply(df_order, as.numeric)
    df_order <- df_order[rowMeans(df_order) > min_mean_count,]
    df_order <- rownames(df_order)
    
  ASVs_to_keep[[ASVs_to_keep_length+1]] <- df_order
  names(ASVs_to_keep)[[ASVs_to_keep_length+1]] <- paste(Replicates2)
  }
  ASVs_to_keep <- unique(as.vector(unlist(ASVs_to_keep)))
  
  ########################
  #Extract Deseq2 Results#
  resgenelist   <- list()
    for (ii in names(res)) {
      genes <- res[[ii]]$rowname
      resgenelist[[ii]] <- genes}
  resgenelist <- unique(as.vector(unlist(resgenelist)))
  
  #####################################################
  #Plot Hamp of z-Score CLR filtered for MinMean_Count#
  resgenelist <- resgenelist[resgenelist %in% ASVs_to_keep]
    # hmap <-  BacteriaHeatPlotRK7(OmicsData = CLR, Species = Species, 
    #                     genes_of_interest = resgenelist, Samples = rownames(SAMDF), 
    #                     SAMDF = SAMDF,  TITLE = TITLE, filename= FILENAME)
  BacteriaHeatPlotRK7_withCore(OmicsData = clr, 
                               Species = Species, 
                               genes_of_interest = resgenelist, 
                               Samples = SAMDF$SampleID, 
                               SAMDF = SAMDF,  
                               TITLE = TITLE, 
                               filename= FILENAME, 
                               OutlineColor="grey20",  
                               SHOW_ROW_NAMES = T, 
                               SHOW_ROW_NAMES_ALL = F,  
                               ZScore =  F)
  
  #################################
  #Extract Genes/Taxa from Heatmap#
    hmap        <- draw(hmap)
    clrow       <- row_order(hmap)
    genecluster <- lapply(names(clrow), function(x){
         out  <- data.frame(GeneID = rownames(heat[clrow[[x]],]),
                    clrow = paste0(x),stringsAsFactors = FALSE)
            return(out)}) %>%   
         do.call(rbind, .)
    
  ######################
  #Save Cluster Results#
  Cluster1    <- genecluster[genecluster$clrow %in% c("Cluster 1"),]
  Cluster2    <- genecluster[genecluster$clrow %in% c("Cluster 2"),]
  Cluster3    <- genecluster[genecluster$clrow %in% c("Cluster 3"),]
  Cluster4    <- genecluster[genecluster$clrow %in% c("Cluster 4"),]
  #Cluster5    <- genecluster[genecluster$clrow %in% c("Cluster 5"),]
  #Cluster6    <- genecluster[genecluster$clrow %in% c("Cluster 6"),]
  Cluster1    <- heat[rownames(heat) %in% Cluster1$GeneID,]
  Cluster2    <- heat[rownames(heat) %in% Cluster2$GeneID,]
  Cluster3    <- heat[rownames(heat) %in% Cluster3$GeneID,]
  Cluster4    <- heat[rownames(heat) %in% Cluster4$GeneID,]
  #Cluster5    <- heat[rownames(heat) %in% Cluster5$GeneID,]
  #Cluster6    <- heat[rownames(heat) %in% Cluster6$GeneID,]
  Cluster[[a+1]] <- as.data.frame(Cluster1)
  Cluster[[a+2]] <- as.data.frame(Cluster2)
  Cluster[[a+3]] <- as.data.frame(Cluster3)
  Cluster[[a+4]] <- as.data.frame(Cluster4)
  #Cluster[[a+5]] <- as.data.frame(Cluster5)
  #Cluster[[a+6]] <- as.data.frame(Cluster6)
  names(Cluster)[[a+1]] <- paste("Cluster1-", sub("dds", "\\1", Datasetname), sep="")
  names(Cluster)[[a+2]] <- paste("Cluster2-", sub("dds", "\\1", Datasetname), sep="")
  names(Cluster)[[a+3]] <- paste("Cluster3-", sub("dds", "\\1", Datasetname), sep="")
  names(Cluster)[[a+4]] <- paste("Cluster4-", sub("dds", "\\1", Datasetname), sep="")
  #names(Cluster)[[a+5]] <- paste("Cluster5-", sub("dds", "\\1", Datasetname), sep="")
  #names(Cluster)[[a+6]] <- paste("Cluster6-", sub("dds", "\\1", Datasetname), sep="")
  
  Cluster[[a+5]] <- hmap
  names(Cluster)[[a+5]] <- paste("hmap", sub("dds", "\\1", Datasetname), sep="")
  
  Cluster[[a+6]] <- HeatPlot_prow
  names(Cluster)[[a+6]] <- paste("HeatPlot_prow", sub("dds", "\\1", Datasetname), sep="")
  },
  error=function(e){cat("ERROR :",conditionMessage(e), "\n")})
}
## [1] "provide relative abundance data"

## [1] "provide relative abundance data"

###########
#Save Data#
###########
saveRDS(Cluster, file = paste0(file.path(path_Output_16S,paste(paste(save_name, "16S_DAT_Replicates_pw4_Cluster", Date, sep="_"), ".rds", sep=""))))
        
Cluster <-readRDS(file = paste0(file.path(path_Output_16S,paste(paste(save_name, "16S_DAT_Replicates_pw4_Cluster", Date, sep="_"), ".rds", sep=""))))


#Export Data for Cluster
###########################################
#SSU_Data created in 4.2.4 Create SSU_Data#
###########################################

Deseq2Cluster_Taxonomy_list <- list()
for (Dataset in names(ddslist)[grepl("dds", names(ddslist))]) {
  Datasetname <- sub('....', '', Dataset)
  
  ps <- pslist[[paste("ps", Datasetname, sep="_")]]
  SAMDF       <- SAMDF16S[SAMDF16S$SampleID%in% rownames(sample_data(ps)),]
  SSU_Data <- SSU_Data_list[[which(grepl(Datasetname, names(SSU_Data_list)))]]
  
  
  ClusterSet <- Cluster[grepl(Datasetname, names(Cluster))]
  
  Deseq2Cluster_Taxonomy_list_length <- length(Deseq2Cluster_Taxonomy_list)
  
  for (Comparison in names(ClusterSet[grepl("Cluster", names(ClusterSet))])) {

      df <- ClusterSet[[paste(Comparison)]]
      df <- SSU_Data[SSU_Data$ASV %in% rownames(df),]
      
      #df <- df %>% relocate(ASV, .after=SLSU21MLEB9rel)
      rownames(df) <- df$ASV
    
      ASVs_to_keep <- list()
        for (Replicates2 in unique(SAMDF[[variable]])) {
          ASVs_to_keep_length <- length(ASVs_to_keep)
          df_ASV <- as.data.frame(t(otu_table(ps)))
          df_ASV_order   <- df_ASV[names(df_ASV) %in% SAMDF[SAMDF[variable] == Replicates2,]$SampleID]
          df_ASV_order[] <- sapply(df_ASV_order, as.numeric)
          #Subsetting for the actual Cluster#
          df_ASV_order   <- df_ASV_order[rownames(df_ASV_order) %in% rownames(df),]
          df_ASV_rel     <- as.data.frame(t(otu_table(ps %>%
            transform_sample_counts(function(x) {x/sum(x)*100}))))
          
          df_ASV_order_rel <- left_join(df_ASV_order %>%
                                  mutate(ASV = rownames(df_ASV_order)) %>%
                                  select("ASV"), df_ASV_rel[names(df_ASV_rel) %in%
                                  names(df_ASV_order)] %>% mutate(ASV = rownames(df_ASV_rel)))
          
          df_ASV_order_rel_mean <- df_ASV_order_rel %>%
            dplyr::select(-ASV) %>%
            dplyr::summarise(across(everything(), mean, na.rm = TRUE))  %>% rowMeans(.)
          
      ASVs_to_keep[[ASVs_to_keep_length+1]] <- df_ASV_order_rel_mean
      names(ASVs_to_keep)[[ASVs_to_keep_length+1]] <- paste(Replicates2)
      }
      
      max_name <- names(ASVs_to_keep)[which.max(unlist(ASVs_to_keep))]
      
      df_SSU_order <- df[grepl(paste(max_name), names(df)) & grepl("rel", names(df))]
      
      df_SSU_order[] <- sapply(df_SSU_order, as.numeric)
      df_SSU_order<- as.data.frame(sort(rowMeans(df_SSU_order), decreasing = T)) 
      names(df_SSU_order) <-paste("Mean", max_name, sep="_")
      df_SSU_order$ASV <- rownames(df_SSU_order)
      
      df <- left_join(df_SSU_order, df)

      Deseq2Cluster_Taxonomy_list[[ Deseq2Cluster_Taxonomy_list_length+1]] <- df
      names( Deseq2Cluster_Taxonomy_list)[[ Deseq2Cluster_Taxonomy_list_length+1]] <- Comparison
      write.csv2(df, file.path(path_Output_16S,paste(paste(save_name,  "16S_DAT_Replicates",paste("Deseq2_",Comparison, sep=""), Date, sep="_"), ".csv", sep="")))
  }
}

#-

8 Venn Diagramms

#VennDiagramm of Fish and Watersamples
#We merge the phyloseqs here so that we have the filtered ASV per Sample_kind in one dataset
ps <- merge_phyloseq(pslist$ps_OE, pslist$ps_GC, pslist$ps_WF)

Venn_list_ps_Fish <- list(
"OE" = names(rowSums(t(otu_table(subset_samples(ps, SampleID %in% sample_names(ps)[grepl("OE", sample_names(ps))])))) > 1)[rowSums(t(otu_table(subset_samples(ps, SampleID %in% sample_names(ps)[grepl("OE", sample_names(ps))])))) > 1], 
"GC" = names(rowSums(t(otu_table(subset_samples(ps, SampleID %in% sample_names(ps)[grepl("GC", sample_names(ps))])))) > 1)[rowSums(t(otu_table(subset_samples(ps, SampleID %in% sample_names(ps)[grepl("GC", sample_names(ps))])))) > 1], 
"Bacterioplankton" = names(rowSums(t(otu_table(subset_samples(ps, SampleID %in% sample_names(ps)[grepl("WFSU|WFAU|WFWI|WFSP", sample_names(ps))])))) > 1)[rowSums(t(otu_table(subset_samples(ps, SampleID %in% sample_names(ps)[grepl("WFSU|WFAU|WFWI|WFSP", sample_names(ps))])))) > 1]
)
Venn_plot <- ggVennDiagram::ggVennDiagram(Venn_list_ps_Fish , set_color = c("brown1","seagreen", "darkblue"), edge_size=1, set_size = 0) + theme(legend.position = "none")
ggsave(Venn_plot, filename = paste(paste(save_name, "VENN_Species", sep="_"), ".png", sep=""), path = pathPlots , device='png', dpi=300, width = 4, height = 4)
plot(Venn_plot)

Overlapping_Taxa <- Venn_list_ps_Fish$Bacterioplankton[Venn_list_ps_Fish$Bacterioplankton %in% c(Venn_list_ps_Fish$OE, Venn_list_ps_Fish$GC)]


###########################
#Seasonal Presence-Absence#
###########################
data <-  as.data.frame(t(otu_table(ps))) %>%
  rownames_to_column(var = "ASV") %>%
  pivot_longer(2:length(.), names_to = "SampleID", values_to = "count") %>% 
  dplyr::left_join(data.frame(sample_data(ps))) 

Venn_list <- list()
for (SPECIES in unique(sample_data(ps)$Species)) {
  Venn_list[[SPECIES]] <- list()
  for (SEASON in SeasonOrder) {
    filtered_data <- data %>%
      dplyr::filter(Species == SPECIES) %>%
      dplyr::filter(grepl(SEASON, Season)) %>%
      dplyr::group_by(ASV) %>% 
      dplyr::mutate(asv_sum = sum(count)) %>% 
      dplyr::filter(asv_sum > 1) %>%
      dplyr::select(-asv_sum) %>%
      dplyr::select(ASV, SampleID, count) %>%
      pivot_wider(names_from =  SampleID, values_from = count) 
      #column_to_rownames(var = "ASV") 
    Venn_list[[SPECIES]][[SEASON]] <- filtered_data
  }
}

for (SPECIES in unique(sample_data(ps)$Species)) {
  trial <- lapply(Venn_list[[SPECIES]], function(data) data$ASV)
  COL   <- unname(col.Palette$col.Palette.Season[names(col.Palette$col.Palette.Season) %in% names(trial)])
  Venn_plot_Season <- ggVennDiagram::ggVennDiagram(trial, set_color =  COL, edge_size=1, set_size = 0) + 
  theme(legend.position = "none") +
  scale_x_continuous(expand = expansion(mult = .2))
  ggsave(Venn_plot_Season, filename = paste(paste(save_name, "VENN_Season", SPECIES, sep="_"), ".png", sep=""), path = pathPlots , device='png', dpi=300, width = 5, height = 5)
}

plot(Venn_plot_Season)

#-

Save Data

###########
#Save Data#
###########
saveRDS(pslist, file.path(path_Output_16S, paste(paste(save_name, "ps_16S_merge_Filter_ASV_besttax", Date, sep="_"), ".rds", sep="")))